START

Goal/Purpose of operations: 
The DepMap/PRISM’s primary and secondary pooled drug screens were used to help evaluate if a candidate could be a suitable candidate (if that drug was tested). The primary screen calculated the median of log fold change median fluorescence intensity between replicates of a cell line treated with a drug. The PRISM study considered a cell line as sensitive to a treatment if the median-collapsed fold-change is less than 0.3. The secondary screen calculated the Area under the curve of the dose-response curve from a 4-parameter logistic curve. While the PRISM study did not provide a cut-off for sensitivity, lower AUC values were considered more sensitive, and we compared the AUC values of a drug candidate to the AUC of temozolomide, the standard treatment of GBM (~0.90).

Finished psedocode on: 
220920

System which operations were done on:
my laptop

GitHub Repo:
Transfer_Learning_R03

Docker:
rstudio_cancer_dr

Directory of operations:
/home - docker

Scripts being edited for operations: 
NA

Data being used: 
DESeq2 and limma results 

Papers and tools:
NA

STEPS

Set working directory

library(ggplot2)
library(cowplot)
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
## 
##     get_legend
library(recount3)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
library(SummarizedExperiment)
library(viridis)
## Loading required package: viridisLite
source("/home/rstudio/script/functions_cancer_signature_reversion_JLF.R")
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ✔ purrr   0.3.4      
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse()        masks IRanges::collapse()
## ✖ dplyr::combine()         masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count()           masks matrixStats::count()
## ✖ dplyr::desc()            masks IRanges::desc()
## ✖ tidyr::expand()          masks S4Vectors::expand()
## ✖ dplyr::filter()          masks stats::filter()
## ✖ dplyr::first()           masks S4Vectors::first()
## ✖ dplyr::lag()             masks stats::lag()
## ✖ BiocGenerics::Position() masks ggplot2::Position(), base::Position()
## ✖ purrr::reduce()          masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename()          masks S4Vectors::rename()
## ✖ dplyr::slice()           masks IRanges::slice()

load in data

#deseq2 results 
deseq_results <- read.csv("~/output/deseq2_gbm/220927_deseq2_gbm_normal_gtx_res.csv", sep=",")

feature_info <- readRDS("~/data/recount3/recount3_fix_download/feature_info.rds")
identical(feature_info$gene_id, rownames(deseq_results))
## [1] TRUE
deseq_results$Symbol<- feature_info$gene_name


#limma results
#GBM_GTEX_gene_limma_res <- readRDS("~/output/limma_gbm/220421_GBM_GTEX_gene_limma_res.rds")
#GBM_GTEX_gene_limma <- GBM_GTEX_gene_limma_res$limma
#rds not in github using tsv instead

GBM_GTEX_gene_limma <- read_tsv("~/output/limma_gbm/220421_limma_TCGA_GTEX_T_vs_NT_limma_results.tsv")
## Rows: 63856 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): LV
## dbl (6): logFC, AveExpr, t, P.Value, adj.P.Val, B
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#disease signatures from all the methods 
gbm_tfl_input <- readRDS("~/output/TF_L_GBM/22004_SR_gene_list_gbm_tfl.rds")
gbm_deseq2_input<- readRDS("~/output/deseq2_gbm/SR_gene_list_gbm_deseq2.rds")
gbm_limma_input<- readRDS("~/output/limma_gbm/SR_gene_list_gbm_limma.rds")

#liver

#deseq2
LIHC_deseq_results <- read.csv("~/output/liver_cancer/deseq2_res/220929_deseq2_lihc_normal_gtx_res.csv", sep=",")
LIHC_deseq_results$Symbol<- feature_info$gene_name
#limma
#LIHC_GTEX_gene_limma_res <- readRDS("~/output/liver_cancer/limma_res/220808_LIHC_GTEX_gene_limma_res.rds")
#LIHC_GTEX_gene_limma <- LIHC_GTEX_gene_limma_res$limma
LIHC_GTEX_gene_limma<- read_tsv("~/output/liver_cancer/limma_res/220808_limma_TCGA_GTEX_T_vs_NT_liver_cancer_limma_results.tsv")
## Rows: 63856 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): LV
## dbl (6): logFC, AveExpr, t, P.Value, adj.P.Val, B
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#disease singatures 
liver_tfl_input <- readRDS("~/output/liver_cancer/TFL_res/SR_gene_list_liver_tfl.rds")
liver_deseq2_input <- readRDS("~/output/liver_cancer/deseq2_res/SR_gene_list_liver_deseq2.rds")
liver_limma_input <- readRDS("~/output/liver_cancer/limma_res/SR_gene_list_liver_limma.rds")

#lung

#deseq2
lung_deseq_results <- read.csv("~/output/lung_cancer/deseq2_res/220929_deseq2_luad_normal_gtx_res.csv", sep=",")
lung_deseq_results$Symbol<- feature_info$gene_name
#limma
#lung_GTEX_gene_limma_res <- readRDS("~/output/lung_cancer/limma_res/220808_LUAD_GTEX_gene_limma_res.rds")
#lung_GTEX_gene_limma <- lung_GTEX_gene_limma_res$limma
lung_GTEX_gene_limma<- read_tsv("~/output/lung_cancer/limma_res/220808_limma_TCGA_GTEX_T_vs_NT_lung_cancer_limma_results.tsv")
## Rows: 63856 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): LV
## dbl (6): logFC, AveExpr, t, P.Value, adj.P.Val, B
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#disease singatures 
lung_deseq2_input <- readRDS("~/output/lung_cancer/deseq2_res/SR_gene_list_luad_deseq2.rds")
lung_tfl_input<- readRDS("~/output/lung_cancer/deseq2_res/SR_gene_list_lung_tfl.rds")
lung_limma_input<- readRDS("~/output/lung_cancer/deseq2_res/SR_gene_list_lung_limma.rds")

#pancreatic cancer

#deseq2
paad_deseq_results <-read.csv("~/output/pancreas_cancer/deseq2_res/Deseq2_paad_normal_gtx_res.csv", sep= ",")
paad_deseq_results$Symbol<- feature_info$gene_name
#limma
#paad_GTEX_gene_limma_res <- readRDS("~/output/pancreas_cancer/limma_res/220808_PAAD_GTEX_gene_limma_res.rds")
#paad_GTEX_gene_limma <- paad_GTEX_gene_limma_res$limma
paad_GTEX_gene_limma<- read_tsv("~/output/pancreas_cancer/limma_res/220808_limma_TCGA_GTEX_T_vs_NT_pancreas_cancer_limma_results.tsv")
## Rows: 63856 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (1): LV
## dbl (6): logFC, AveExpr, t, P.Value, adj.P.Val, B
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#disease singatures 
paad_tfl_input <- readRDS("~/output/pancreas_cancer/TFL_res/SR_gene_list_paad_tfl.rds")
paad_limma_input <- readRDS("~/output/pancreas_cancer/limma_res/SR_gene_list_paad_limma.rds")
paad_deseq2_input <- readRDS("~/output/pancreas_cancer/deseq2_res/SR_gene_list_paad_deseq2.rds")

need to create the gene venn diagram for liver cancer

compare gene list across the different methods

venn_dia_methods(liver_limma_input$up, liver_deseq2_input$up, liver_tfl_input$up, file_name='~/output/liver_cancer/SR_up_genes_venn_diagramm.png' )
venn_dia_methods(liver_limma_input$down, liver_deseq2_input$down, liver_tfl_input$down, file_name='~/output/liver_cancer/SR_down_genes_venn_diagramm.png' )
venn_dia_methods(unlist(liver_limma_input), unlist(liver_deseq2_input), unlist(liver_tfl_input), file_name='~/output/liver_cancer/SR_all_genes_venn_diagramm.png' )

GBM

#gbm_deseq2
deseq_gbm_filter <- volcano_plots(deseq_results, "DESeq2", gbm_deseq2_input, gbm_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

table(deseq_gbm_filter$padj[deseq_gbm_filter$groups == "Transfer Learning"] < 0.05)
## 
## FALSE  TRUE 
##    32    72
limma_gbm_filter<- volcano_plots(GBM_GTEX_gene_limma, "limma", gbm_limma_input, gbm_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

table(limma_gbm_filter$padj[limma_gbm_filter$groups == "Transfer Learning"] < 0.05)
## 
## FALSE  TRUE 
##    42    52

liver

deseq_liver_filter<- volcano_plots(LIHC_deseq_results, "DESeq2", liver_deseq2_input, liver_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

table(deseq_liver_filter$padj[deseq_liver_filter$groups == "Transfer Learning"] < 0.05)
## 
## FALSE  TRUE 
##    14    74
limma_liver_filter<- volcano_plots(LIHC_GTEX_gene_limma, "limma", liver_limma_input, liver_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

table(limma_liver_filter$padj[limma_liver_filter$groups == "Transfer Learning"] < 0.05)
## 
## FALSE  TRUE 
##     4    79

Lung

deseq_lung_filter<- volcano_plots(lung_deseq_results, "DESeq2", lung_deseq2_input, lung_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

table(deseq_lung_filter$padj[deseq_lung_filter$groups == "Transfer Learning"] < 0.05)
## 
## FALSE  TRUE 
##    11   105
limma_lung_filter<- volcano_plots(lung_GTEX_gene_limma, "limma", lung_limma_input, lung_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

table(limma_lung_filter$padj[limma_lung_filter$groups == "Transfer Learning"] < 0.05)
## 
## FALSE  TRUE 
##    11    91
#gbm_deseq2
deseq_paad_filter<- volcano_plots(paad_deseq_results, "DESeq2", paad_deseq2_input, paad_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

table(deseq_paad_filter$padj[deseq_paad_filter$groups == "Transfer Learning"] < 0.05)
## 
## FALSE  TRUE 
##    83    30
limma_paad_filter<- volcano_plots(paad_GTEX_gene_limma, "limma", paad_limma_input, paad_tfl_input)
## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

## Warning in upset_v2[i] <- gene_info_LINCS$gene_id[gene_list[i] ==
## gene_info_LINCS$gene_symbol]: number of items to replace is not a multiple of
## replacement length

table(limma_paad_filter$padj[limma_paad_filter$groups == "Transfer Learning"] < 0.05)
## 
## FALSE  TRUE 
##    65    42

GBM

compare_deseq2_limma(deseq_results, GBM_GTEX_gene_limma)

## Warning: Removed 7 rows containing non-finite values (stat_bin2d).
## Warning: Removed 7 rows containing non-finite values (stat_cor).

liver

compare_deseq2_limma(LIHC_deseq_results, LIHC_GTEX_gene_limma,  x_p1=2, y_p1=20, x_p2=100, y_p2=400)

compare_deseq2_limma(lung_deseq_results,lung_GTEX_gene_limma,  x_p1=-5, y_p1=20, x_p2=100, y_p2=400)

compare_deseq2_limma(paad_deseq_results,paad_GTEX_gene_limma,  x_p1=-5, y_p1=20, x_p2=25, y_p2=50)

cancers<- factor(c("glioblastoma", "liver hepatocellular \ncarcinoma",  "lung \nadenocarcinoma", " pancreatic \nadenocarcinoma"), levels = c("glioblastoma", "liver hepatocellular \ncarcinoma",  "lung \nadenocarcinoma", " pancreatic \nadenocarcinoma"))
deseq2_limma_sum<- data.frame(cancers= cancers, spearman =c(0.94, 0.69, 0.81, 0.81, 0.72, 0.55, 0.61, 0.55), Metric = c(rep("logFC", 4), rep("Adj. p-value", 4)))
ggplot(deseq2_limma_sum, aes(x=Metric, y= spearman, fill=Metric)) + geom_bar(stat="identity", position=position_dodge(), color = "black")+ ylab("Spearman Coefficient") +
  facet_wrap(~ cancers, ncol = 4)+ xlab("Metric") + scale_fill_viridis_d(option= "E") +theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) +theme(text = element_text(size = 35,  face="bold"))

library(readr)
deseq_results_all <- read_csv("~/output/deseq2_gbm/220928_SR_LINCS_GBM_DESEQ2_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
#correct file based on limma gbm script
limma_results_all <- read_csv("~/output/limma_gbm/220421_SR_LINCS_GBM_LIMMA_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
tfl_results_all  <- read_csv("~/output/TF_L_GBM/220808_SR_LINCS_GBM_TRL_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
lincs_method_comparsion(deseq_results_all,limma_results_all , tfl_results_all, cell_line= "GI1")

#liver

deseq_results_all_liver <- read_csv("~/output/liver_cancer/deseq2_res/220929_SR_LINCS_LIHC_DESEQ2_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
limma_results_all_liver<- read_csv("~/output/liver_cancer/220808_SR_LINCS_LIVER_LIMMA_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
tfl_results_all_liver<- read_csv("~/output/liver_cancer/TFL_res/220808_SR_LINCS_LIVER_TFL_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
lincs_method_comparsion(deseq_results_all_liver,limma_results_all_liver , tfl_results_all_liver, cell_line= "HEPG2")

#lung

deseq_results_all_lung <- read_csv("~/output/lung_cancer/deseq2_res/220929_SR_LINCS_LUAD_DESEQ2_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
limma_results_all_lung <- read_csv("~/output/lung_cancer/limma_res/220808_SR_LINCS_LUNG_LIMMA_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
tfl_results_all_lung<- read_csv("~/output/lung_cancer/220601_SR_LINCS_LUNG_TFL_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
lincs_method_comparsion(deseq_results_all_lung ,limma_results_all_lung , tfl_results_all_lung, cell_line= "A529")

paad

deseq_results_all_PAAD <- read_csv("~/output/pancreas_cancer/deseq2_res/220929_SR_LINCS_PAAD_DESEQ2_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
limma_results_all_PAAD <- read_csv( "~/output/pancreas_cancer/limma_res/220808_SR_LINCS_PANCREAS_LIMMA_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
tfl_results_all_PAAD<- read_csv( "~/output/pancreas_cancer/220808_SR_LINCS_PANCREAS_TFL_RES.csv")
## New names:
## Rows: 101544 Columns: 14
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): pert, PCID, cell, type, trend, t_gn_sym dbl (8): ...1, WTCS, WTCS_Pval,
## WTCS_FDR, NCS, NCSct, N_upset, N_downset
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
lincs_method_comparsion(deseq_results_all_PAAD ,limma_results_all_PAAD , tfl_results_all_PAAD, cell_line= "YAPC")

#plot all of them together

NCS_sp<- c(0.59, 0.3, 0.37, 0.18, 0.036, 0.4, 0.029, -0.026, 0.27, 0.34, 0.28, 0.36)
FDR_sp <- c(0.38, 0.16, 0.13, -0.012, -0.048, 0.18, 0.053, -0.058, -0.036, 0.12, 0.16, 0.23)
NCS_test<- c(T, T,T, T, F,T,F,F,T,T,T,T)
FDR_test<- c(T,T,T,F,F,T,F,F,F,T,T,T)
#c("glioblastoma", "liver hepatocellular carcinoma",  "lung adenocarcinoma", " pancreatic adenocarcinoma")
cancer<- c(rep("glioblastoma", 3), rep("liver hepatocellular \ncarcinoma", 3), rep("lung \nadenocarcinoma", 3), rep("pancreatic \nadenocarcinoma", 3))
comp <- rep( c("DESeq2 vs. \nlimma", "DESeq2 vs \nTransfer Learning", "limma vs \nTransfer Learning"), 8)

lincs_result_compare <- data.frame(NCS_sp, FDR_sp, NCS_test, FDR_test, cancer, comp)
lincs_result_compare_longer<- pivot_longer(lincs_result_compare, cols = c(NCS_sp, FDR_sp), values_to = "spearman", names_to = "metric_1")
lincs_result_compare_longer2<- pivot_longer(lincs_result_compare_longer, cols = c(NCS_test, FDR_test), values_to = "Significant", names_to = "metric_2")
lincs_result_compare_longer2$metric <- strtrim(lincs_result_compare_longer2$metric_1, 3)
lincs_result_compare_longer2$symbol <- ifelse(lincs_result_compare_longer2$Significant == TRUE, "*", "")
ggplot(lincs_result_compare_longer2, aes(x=metric, y= spearman, fill=metric)) + geom_bar(stat="identity", position=position_dodge(), color = "black")+ ylab("Spearman Coefficient") + xlab("Metric")  +theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) +facet_grid( comp ~ cancer) + scale_fill_viridis_d(option= "E")+theme(text = element_text(size = 20,  face="bold"))

END

Location of final scripts:
scripts

Location of data produced:
output

Dates when operations were done:
220920

Versions

sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] forcats_0.5.2               stringr_1.4.1              
##  [3] dplyr_1.0.10                purrr_0.3.4                
##  [5] readr_2.1.2                 tidyr_1.2.1                
##  [7] tibble_3.1.8                tidyverse_1.3.2            
##  [9] viridis_0.6.2               viridisLite_0.4.1          
## [11] recount3_1.4.0              SummarizedExperiment_1.24.0
## [13] Biobase_2.54.0              GenomicRanges_1.46.1       
## [15] GenomeInfoDb_1.30.1         IRanges_2.28.0             
## [17] S4Vectors_0.32.4            BiocGenerics_0.40.0        
## [19] MatrixGenerics_1.6.0        matrixStats_0.62.0         
## [21] ggpubr_0.4.0                cowplot_1.1.1              
## [23] ggplot2_3.3.6              
## 
## loaded via a namespace (and not attached):
##   [1] googledrive_2.0.0        colorspace_2.0-3         ggsignif_0.6.3          
##   [4] rjson_0.2.21             ellipsis_0.3.2           XVector_0.34.0          
##   [7] fs_1.5.2                 rstudioapi_0.13          farver_2.1.1            
##  [10] bit64_4.0.5              fansi_1.0.3              lubridate_1.8.0         
##  [13] xml2_1.3.3               R.methodsS3_1.8.2        cachem_1.0.6            
##  [16] knitr_1.40               jsonlite_1.8.0           Rsamtools_2.10.0        
##  [19] broom_1.0.1              dbplyr_2.2.1             R.oo_1.25.0             
##  [22] compiler_4.1.3           httr_1.4.4               backports_1.4.1         
##  [25] assertthat_0.2.1         Matrix_1.5-1             fastmap_1.1.0           
##  [28] gargle_1.2.1             cli_3.4.1                htmltools_0.5.3         
##  [31] tools_4.1.3              gtable_0.3.1             glue_1.6.2              
##  [34] GenomeInfoDbData_1.2.7   rappdirs_0.3.3           Rcpp_1.0.9              
##  [37] carData_3.0-5            cellranger_1.1.0         jquerylib_0.1.4         
##  [40] vctrs_0.4.2              Biostrings_2.62.0        rtracklayer_1.54.0      
##  [43] xfun_0.33                rvest_1.0.3              lifecycle_1.0.2         
##  [46] restfulr_0.0.15          rstatix_0.7.0            XML_3.99-0.10           
##  [49] googlesheets4_1.0.1      zlibbioc_1.40.0          scales_1.2.1            
##  [52] vroom_1.5.7              hms_1.1.2                parallel_4.1.3          
##  [55] yaml_2.3.5               curl_4.3.2               memoise_2.0.1           
##  [58] gridExtra_2.3            sass_0.4.2               stringi_1.7.8           
##  [61] RSQLite_2.2.17           highr_0.9                BiocIO_1.4.0            
##  [64] filelock_1.0.2           BiocParallel_1.28.3      rlang_1.0.6             
##  [67] pkgconfig_2.0.3          bitops_1.0-7             evaluate_0.16           
##  [70] lattice_0.20-45          labeling_0.4.2           GenomicAlignments_1.30.0
##  [73] bit_4.0.4                tidyselect_1.1.2         magrittr_2.0.3          
##  [76] R6_2.5.1                 generics_0.1.3           DelayedArray_0.20.0     
##  [79] DBI_1.1.3                pillar_1.8.1             haven_2.5.1             
##  [82] withr_2.5.0              abind_1.4-5              RCurl_1.98-1.8          
##  [85] modelr_0.1.9             crayon_1.5.2             car_3.1-0               
##  [88] utf8_1.2.2               BiocFileCache_2.2.1      tzdb_0.3.0              
##  [91] rmarkdown_2.16           readxl_1.4.1             grid_4.1.3              
##  [94] data.table_1.14.2        blob_1.2.3               reprex_2.0.2            
##  [97] digest_0.6.29            R.utils_2.12.0           munsell_0.5.0           
## [100] bslib_0.4.0              sessioninfo_1.2.2